Extract Email from deleviry failure mail & remove it from database


In [14]:
import re
import pprint,sys

In [2]:
filename = "email1"

In [3]:
f=open(filename,'r')
data=f.read()
f.close()
print len(data)


74901

In [4]:
mailRegex = re.compile(r'([\w.]+(@|\(at\))\w+(\.|\(dot\))((\w){2,7}(\.|\(dot\))(\w){2,5}|(\w){2,7}))')

In [10]:
res=mailRegex.findall(data)
print len(res),"emails found"


487 emails found

In [11]:
#Removing duplicates & extras
ls=[]
for email in res:
    if email[0] != "daemon@googlemail.com" and email[0] != "sharecoupans@gmail.com":
        if email[0] not in ls:
            ls.append(email[0])
print "total email to remove ",len(ls)


total email to remove  82

In [15]:
pprint.pprint(ls)


['007pru@gmail.com',
 'CAG3Hmb8Ysvb7QswrjNauKNfA_NasRC8YLg4Le86RYqknDOTS7w@mail.gmail.com',
 '007.apurv@gmail.com',
 '007bond.jatin@gmail.com',
 '100rbh100@gmail.com',
 '019.amit@gmail.com',
 '03vc96@gmail.com',
 '09691a04b1@gmail.com',
 '05keerthi.s@gmail.com',
 '00sahi@gmail.com',
 '1.jaygala@gmail.com',
 '007kravi@gmail.com',
 '004upadhyay@gmail.com',
 '070790vivek@gmail.com',
 '101tyagi@gmail.com',
 '07.kishan@gmail.com',
 '09mohit1994@gmail.com',
 '019ashka@gmail.com',
 '06sid1991@gmail.com',
 '01paresh01@gmail.com',
 '019mohit.kumar@gmail.com',
 '0dwivediashish0@gmail.com',
 '03.msahai03@gmail.com',
 '07jaisani@gmail.com',
 '04051995feb@gmail.com',
 '007nelsonrodrigues@gmail.com',
 '05midhun09@gmail.com',
 '050277sun@gmail.com',
 '02oodp14@gmail.com',
 '007rishabhjain@gmail.com',
 '07aakash03@gmail.com',
 '007samyakjain@gmail.com',
 '1012abhik@gmail.com',
 '09nasa10@gmail.com',
 '07aryan07@gmail.com',
 '1.anamika.singh@gmail.com',
 '0123.parvej@gmail.com',
 '0984.durgesh@gmail.com',
 '0marfernandes15@gmail.com',
 '0406.nishtha@gmail.com',
 '10.kashif@gmail.com',
 '0201shivank93@gmail.com',
 '07249512mkt@gmail.com',
 '021.paul@gmail.com',
 '0467nancy@gmail.com',
 '01prateekkanwal@gmail.com',
 '06abhaygupta@gmail.com',
 '028upasana820@gmail.com',
 '04pankajb@gmail.com',
 '0808nkm@gmail.com',
 '01.0100n.00@gmail.com',
 '01akash47@gmail.com',
 '1.vinay.rawat@gmail.com',
 '0abhinav@gmail.com',
 '010naveen@gmail.com',
 '07.aritra@gmail.com',
 '1.deepak.r@gmail.com',
 '007akshayjain@gmail.com',
 '07chaingang@gmail.com',
 '01.harshita@gmail.com',
 '050meenusharma@gmail.com',
 '02051996ravi@gmail.com',
 '007expo@gmail.com',
 '1.gurpreet@gmail.com',
 '007animator@gmail.com',
 '013ayush@gmail.com',
 '09augarunyadav@gmail.com',
 '007.rassel@gmail.com',
 '01010raj@gmail.com',
 '09anshul09@gmail.com',
 '00edy00@gmail.com',
 '007aru@gmail.com',
 '007prosenjit@gmail.com',
 '007yuvraj@gmail.com',
 '008dasrup12@gmail.com',
 '009basith@gmail.com',
 '007.sohit@gmail.com',
 '08.renuka.m@gmail.com',
 '07.harsh@gmail.com',
 '006hacker@gmail.com',
 '007mohan6@gmail.com',
 '007amanbansal@gmail.com']

In [16]:
print "R you sure to remove"
check=raw_input("y/n")
if check=='n':
    sys.exit()


R you sure to remove
y/ny

Removing from database


In [17]:
from pymongo import MongoClient

In [21]:
client=MongoClient()

db=client.Emaildb
inidata=db.data.count()

In [19]:
for email in ls:
    try:
        db.data.remove({"email" : email})
    except:
        print "email not found"

In [22]:
print inidata-db.data.count(),"Documents removed"


0 Documents removed

In [ ]: